library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(ggplot2)
#Summary:
#Multilevel mixed regression models coupled with time lag framework was used to access the effect of key malaria indicators: Parasite rate, PR (transmission intensity) and Drug use (Proportion of fevered children treated with CQ, SP and ACTs) on the reported prevalence of key drug (CQ, SP and ACTs) resistant markers: pfcrt-K76T, pfmdr1-N86Y, pfdhps-A437G, pfdhfr-S108N and pfk13-C580Y. independently.
###PFCRT K76T
#Prepare Database
cq<- read_csv("CQ_imputed.csv")
## Rows: 782 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (4): year, mean_cq, du_obs, num_obs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Set up lag years
cq<-cq%>%group_by(country)%>%mutate(drug_1yr_ago = lag(mean_cq,n=1),drug_2yr_ago = lag(mean_cq,n=2), drug_3yr_ago = lag(mean_cq,n=3),drug_4yr_ago = lag(mean_cq,n=4),drug_5yr_ago = lag(mean_cq,n=5))
# join original observation
crt<-read_csv ("pfcrt_SNP.csv")%>%left_join(cq)
## Rows: 1079 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): country, Region, displayName, layerName, layerTitle, value, site, ...
## dbl (15): lat, long, year, study Id, site number, study start, study end, te...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(country, year)`
crt_na_rem <- crt %>% filter(!is.na(mean_cq))
crt_na_rem$Region[is.na(crt_na_rem$Region)] <- "West Africa"
crt_na_rem<- crt_na_rem%>% mutate(PR = PR / 100, Prev_DR = Prev_DR / 100)
write_csv(crt_na_rem, file = "pfcrt_database.csv")
###Model
Mod2_pfcrt <- lmer(Prev_DR ~ PR +mean_cq+PR *mean_cq +drug_1yr_ago+drug_2yr_ago+drug_3yr_ago+(1|country), weights=log(tested),data=crt_na_rem)
summary(Mod2_pfcrt)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Prev_DR ~ PR + mean_cq + PR * mean_cq + drug_1yr_ago + drug_2yr_ago +
## drug_3yr_ago + (1 | country)
## Data: crt_na_rem
## Weights: log(tested)
##
## REML criterion at convergence: Inf
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.81974 -0.51812 0.06087 0.64935 2.90543
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.1450 0.3808
## Residual 0.2337 0.4834
## Number of obs: 520, groups: country, 31
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.34528 0.07521 25.19546 4.591 0.000106 ***
## PR 0.56906 0.10367 499.94860 5.489 6.43e-08 ***
## mean_cq -0.08128 0.60708 496.85225 -0.134 0.893546
## drug_1yr_ago 1.47539 0.83313 496.15865 1.771 0.077193 .
## drug_2yr_ago -1.21082 0.99563 492.13214 -1.216 0.224514
## drug_3yr_ago 1.00150 0.52844 490.47487 1.895 0.058655 .
## PR:mean_cq -1.32604 0.51150 498.69119 -2.592 0.009808 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PR men_cq drg_1_ drg_2_ drg_3_
## PR -0.166
## mean_cq -0.075 0.217
## drug_1yr_ag 0.008 0.084 -0.638
## drug_2yr_ag 0.031 -0.084 -0.056 -0.631
## drug_3yr_ag -0.050 -0.067 0.191 0.326 -0.893
## PR:mean_cq 0.128 -0.648 -0.383 -0.041 0.091 0.020
## optimizer (nloptwrap) convergence code: 0 (OK)
## Gradient contains NAs
plot(Mod2_pfcrt)

qqnorm(residuals(Mod2_pfcrt))

#Interpretation:
#The effect of CQ use on the prevalence of pfcrt-K76T mutation over 3 years lag period, suggested that CQ use negatively influenced pfcrt-K76T mutation in 3years, but this was not found to be significant. On the other hand, Parasite rate, and interaction between Parasite rate and CQ use were significantly (p<0.001 and p<0.05) associated with pfcrt-K76T mutatation, but with positive and negative estimates, respectively.
##Plots
crt_na_rem<- crt_na_rem%>% mutate(PR = PR * 100, Prev_DR = Prev_DR * 100)
#### 1. Plot of Pfcrt_76T Prevalence vs Mean_cq_All regions
ggplot(crt_na_rem, aes(cut(mean_cq,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfcrt_76T_p1.png")
## Saving 7 x 5 in image
### 2. Plot of Pfcrt_76T Prevalence vs PR_All regions
ggplot(crt_na_rem, aes(cut(PR,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfcrt_76T_p2.png")
## Saving 7 x 5 in image
### 3. Plot of the relationship between Region and Year varied between pfcrt 76T Prevalence.
ggplot(data = crt_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(Prev_DR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Pfcrt 76T Prevalence") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfcrt 76T_Timep1.png")
## Saving 7 x 5 in image
### 4. Plot of the relationship between Region and Year varied between Parasite rate.
ggplot(data = crt_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(PR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="PR") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfcrt 76T_Timep2.png")
## Saving 7 x 5 in image
### 5. Plot of the relationship between Region and Year varied between Mean_cq.
ggplot(data = crt_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(mean_cq)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Mean_cq") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfcrt 76T_Timep3.png")
## Saving 7 x 5 in image
###PFMDR1 N86Y
#Prepare Database
# join original observation
mdr<-read_csv ("pfmdr1_SNP.csv")%>%left_join(cq)
## Rows: 873 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): country, Region, displayName, layerName, layerTitle, value, site, ...
## dbl (15): lat, long, year, study Id, site number, study start, study end, te...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(country, year)`
mdr_na_rem <- mdr %>% filter(!is.na(mean_cq))
mdr_na_rem$Region[is.na(mdr_na_rem$Region)] <- "West Africa"
mdr_na_rem<- mdr_na_rem%>% mutate(PR = PR / 100, Prev_DR = Prev_DR / 100)
write_csv(mdr_na_rem, file = "pfmdr1_database.csv")
###Model
Mod2_pfmdr1 <- lmer(Prev_DR ~ PR +mean_cq+PR *mean_cq+drug_1yr_ago+drug_2yr_ago+drug_3yr_ago+(1|country), weights=log(tested),data=mdr_na_rem)
summary(Mod2_pfmdr1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Prev_DR ~ PR + mean_cq + PR * mean_cq + drug_1yr_ago + drug_2yr_ago +
## drug_3yr_ago + (1 | country)
## Data: mdr_na_rem
## Weights: log(tested)
##
## REML criterion at convergence: Inf
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4013 -0.6235 -0.1075 0.4877 3.0595
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.06049 0.2459
## Residual 0.19774 0.4447
## Number of obs: 415, groups: country, 29
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.14918 0.05323 6.16089 2.802 0.03020 *
## PR 0.38442 0.11038 406.33601 3.483 0.00055 ***
## mean_cq 0.29638 0.65033 407.60150 0.456 0.64882
## drug_1yr_ago 0.02468 0.83140 405.01068 0.030 0.97633
## drug_2yr_ago 1.18008 1.00913 401.64871 1.169 0.24293
## drug_3yr_ago -0.49275 0.55182 397.61269 -0.893 0.37243
## PR:mean_cq -0.94726 0.60954 404.70844 -1.554 0.12095
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PR men_cq drg_1_ drg_2_ drg_3_
## PR -0.226
## mean_cq -0.102 0.275
## drug_1yr_ag 0.025 0.062 -0.604
## drug_2yr_ag 0.012 -0.036 -0.090 -0.602
## drug_3yr_ag -0.020 -0.157 0.151 0.305 -0.887
## PR:mean_cq 0.162 -0.673 -0.429 -0.067 0.080 0.101
## optimizer (nloptwrap) convergence code: 0 (OK)
## Gradient contains NAs
plot(Mod2_pfmdr1)

qqnorm(residuals(Mod2_pfmdr1))

#Interpretation:
#The effect of yearly lag in CQ use on the prevalence of pfmdr1-N86Y mutation over 3 years, suggests that the prevalence of pfmdr1-N86Y increased with an increase in CQ use, but this was not significant. Similar to pfcrt- K76T, Parasite rate also significantly (p<0.001) increased as pfmdr1-N86Y mutation increased. However, the interaction between Parasite rate and CQ use was negatively associated with pfmdr1-N86Y prevalence and this was not significant.
##Plots
mdr_na_rem<- mdr_na_rem%>% mutate(PR = PR * 100, Prev_DR = Prev_DR * 100)
#### 1. Plot of Pfmdr1_86Y Prevalence vs Mean_cq_All regions
ggplot(mdr_na_rem, aes(cut(mean_cq,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfmdr1_86Y_p1.png")
## Saving 7 x 5 in image
### 2. Plot of Pfmdr1_86Y Prevalence vs PR_All regions
ggplot(mdr_na_rem, aes(cut(PR,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfmdr1_86Y_p2.png")
## Saving 7 x 5 in image
### 3. Plot of the relationship between Region and Year varied between pfmdr1 86Y Prevalence.
ggplot(data = mdr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(Prev_DR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Pfmdr1 86Y Prevalence") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfmdr1 86Y_Timep1.png")
## Saving 7 x 5 in image
### 4. Plot of the relationship between Region and Year varied between Parasite rate.
ggplot(data = mdr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(PR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="PR") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfmdr1 86Y_Timep2.png")
## Saving 7 x 5 in image
### 5. Plot of the relationship between Region and Year varied between Mean_cq.
ggplot(data = mdr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(mean_cq)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Mean_cq") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfmdr1 86Y_Timep3.png")
## Saving 7 x 5 in image
###PFDHPS A437G
#Prepare Database
sp<- read_csv("SP_imputed.csv")
## Rows: 805 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (4): year, mean_sp, du_obs, num_obs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Set up lag years
sp<-sp%>%group_by(country)%>%mutate(drug_1yr_ago = lag(mean_sp,n=1),drug_2yr_ago = lag(mean_sp,n=2), drug_3yr_ago = lag(mean_sp,n=3),drug_4yr_ago = lag(mean_sp,n=4),drug_5yr_ago = lag(mean_sp,n=5))
# join original observation
dhps<-read_csv ("pfdhps_SNP.csv")%>%left_join(sp)
## Rows: 407 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): country, Region, displayName, layerName, layerTitle, value, site, ...
## dbl (16): lat, long, year, study id, substudy number, study start year, stud...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(country, year)`
dhps_na_rem <- dhps %>% filter(!is.na(mean_sp))
dhps_na_rem<- dhps_na_rem%>% mutate(PR = PR / 100, Prev_DR = Prev_DR / 100)
write_csv(dhps_na_rem, file = "pfdhps_database.csv")
###Model
Mod1_pfdhps <- lmer(Prev_DR ~ PR +mean_sp +PR *mean_sp+drug_1yr_ago+drug_2yr_ago+drug_3yr_ago+ (1|country), weights=log(tested),data=dhps_na_rem)
summary(Mod1_pfdhps)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Prev_DR ~ PR + mean_sp + PR * mean_sp + drug_1yr_ago + drug_2yr_ago +
## drug_3yr_ago + (1 | country)
## Data: dhps_na_rem
## Weights: log(tested)
##
## REML criterion at convergence: Inf
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7934 -0.5432 0.1363 0.5976 2.6641
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.1479 0.3846
## Residual 0.2000 0.4472
## Number of obs: 251, groups: country, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.71429 0.08391 1.63551 8.513 0.0240 *
## PR -0.26998 0.14700 243.27483 -1.837 0.0675 .
## mean_sp -1.74290 2.61448 243.96815 -0.667 0.5056
## drug_1yr_ago -2.28540 4.34417 243.04598 -0.526 0.5993
## drug_2yr_ago 1.39360 3.62504 217.83528 0.384 0.7010
## drug_3yr_ago 0.41217 2.09015 237.08480 0.197 0.8438
## PR:mean_sp 6.65490 4.01963 226.75937 1.656 0.0992 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PR men_sp drg_1_ drg_2_ drg_3_
## PR -0.308
## mean_sp -0.037 0.104
## drug_1yr_ag -0.033 0.077 -0.836
## drug_2yr_ag 0.067 -0.100 0.250 -0.644
## drug_3yr_ag -0.105 0.113 0.223 0.083 -0.773
## PR:mean_sp 0.201 -0.657 -0.233 -0.044 0.106 -0.181
## optimizer (nloptwrap) convergence code: 0 (OK)
## Gradient contains NAs
plot(Mod1_pfdhps)

qqnorm(residuals(Mod1_pfdhps))

#Interpretation:
#SP use was found to negatively influence pfdhps-A437G prevalence at lead times of up to 3 years, howbeit not significantly. Similarly, Parasite rate negatively correlated with pfdhps-A437G and this was not significant. The interaction between Parasite rate and SP use did not significantly influence pfdhps-A437G prevalence either.
##Plots
dhps_na_rem<- dhps_na_rem%>% mutate(PR = PR * 100, Prev_DR = Prev_DR * 100)
#### 1. Plot of Pfdhps_437G Prevalence vs Mean_sp_All regions
ggplot(dhps_na_rem, aes(cut(mean_sp,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfdhps_437G _p1.png")
## Saving 7 x 5 in image
### 2. Plot of Pfdhps_437G Prevalence vs PR_All regions
ggplot(dhps_na_rem, aes(cut(PR,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfdhps_437G _p2.png")
## Saving 7 x 5 in image
### 3. Plot of the relationship between Region and Year varied between pfdhps 437G Prevalence.
ggplot(data = dhps_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(Prev_DR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Pfdhps 437G Prevalence") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhps 437G_Timep1.png")
## Saving 7 x 5 in image
### 4. Plot of the relationship between Region and Year varied between Parasite rate.
ggplot(data = dhps_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(PR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="PR") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhps 437G_Timep2.png")
## Saving 7 x 5 in image
### 5. Plot of the relationship between Region and Year varied between Mean_sp.
ggplot(data = dhps_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(mean_sp)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Mean_sp") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhps 437G_Timep3.png")
## Saving 7 x 5 in image
###PFDHFR S108N
#Prepare Database
# join original observation
dhfr<-read_csv ("pfdhfr_SNP.csv")%>%left_join(sp)
## Rows: 420 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): country, Region, displayName, layerName, layerTitle, value, site, ...
## dbl (16): lat, long, year, study id, substudy number, study start year, stud...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(country, year)`
dhfr_na_rem <- dhfr %>% filter(!is.na(mean_sp))
dhfr_na_rem <- dhfr_na_rem%>% mutate(PR = PR / 100, Prev_DR = Prev_DR / 100)
write_csv(dhfr_na_rem, file = "pfdhfr_database.csv")
###Model
Mod1_pfdhfr <- lmer(Prev_DR ~ PR +mean_sp+PR *mean_sp+drug_1yr_ago+drug_2yr_ago+drug_3yr_ago+ (1|country), weights=log(tested),data=dhfr_na_rem)
summary(Mod1_pfdhfr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Prev_DR ~ PR + mean_sp + PR * mean_sp + drug_1yr_ago + drug_2yr_ago +
## drug_3yr_ago + (1 | country)
## Data: dhfr_na_rem
## Weights: log(tested)
##
## REML criterion at convergence: Inf
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6367 -0.2827 0.1835 0.5600 1.6239
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 0.03555 0.1886
## Residual 0.14551 0.3815
## Number of obs: 255, groups: country, 25
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.94684 0.05025 6.31512 18.841 8.67e-07 ***
## PR -0.41996 0.12462 196.00306 -3.370 0.000906 ***
## mean_sp -0.37253 2.16069 214.29345 -0.172 0.863275
## drug_1yr_ago -1.11633 3.56233 226.36103 -0.313 0.754289
## drug_2yr_ago -4.79563 3.06052 247.38813 -1.567 0.118409
## drug_3yr_ago 3.77976 1.69187 187.53177 2.234 0.026660 *
## PR:mean_sp 9.98689 3.33802 247.71553 2.992 0.003053 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PR men_sp drg_1_ drg_2_ drg_3_
## PR -0.458
## mean_sp -0.069 0.121
## drug_1yr_ag -0.043 0.063 -0.830
## drug_2yr_ag 0.092 -0.086 0.263 -0.668
## drug_3yr_ag -0.151 0.109 0.169 0.161 -0.799
## PR:mean_sp 0.308 -0.670 -0.228 -0.048 0.102 -0.178
## optimizer (nloptwrap) convergence code: 0 (OK)
## Gradient contains NAs
plot(Mod1_pfdhfr)

qqnorm(residuals(Mod1_pfdhfr))

#Interpretation:
#Like in the case of pfdhps-A437G prevalence, the effect of SP use on the prevalence of pfdhfr-S108N mutation over 3 years lag period suggested that SP use negatively influenced pfdhfr-S108N mutation in 3years, but this was not found to be significant. On the other hand, Parasite rate, and the interaction between Parasite rate and SP use were significantly (p<0.001 and p<0.05) associated with pfdhfr-S108N prevalence, but with negative and positive estimates, respectively.
###Plots
dhfr_na_rem <- dhfr_na_rem%>% mutate(PR = PR * 100, Prev_DR = Prev_DR * 100)
#### 1. Plots of Pfdhfr_108N Prevalence vs Mean_sp_All regions
ggplot(dhfr_na_rem, aes(cut(mean_sp,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfdhfr_108N_p1.png")
## Saving 7 x 5 in image
### 2. Plots of Pfdhfr_108N Prevalence vs PR_All regions
ggplot(dhfr_na_rem, aes(cut(PR,10),Prev_DR,col=Region))+geom_boxplot()

ggsave("pfdhfr_108N_p2.png")
## Saving 7 x 5 in image
### 3. Plot of the relationship between Region and Year varied between pfdhps 108N Prevalence.
ggplot(data = dhfr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(Prev_DR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Pfdhfr 108N Prevalence") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhfr 108N_Timep1.png")
## Saving 7 x 5 in image
### 4. Plot of the relationship between Region and Year varied between Parasite rate.
ggplot(data = dhfr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(PR)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="PR") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhfr 108N_Timep2.png")
## Saving 7 x 5 in image
### 5. Plot of the relationship between Region and Year varied between Mean_sp.
ggplot(data = dhfr_na_rem %>% group_by(Region,year) %>% mutate(mean=mean(mean_sp)), aes (y = Region, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Mean_sp") + theme_classic() + xlab ("Year") + ylab ("Region")

ggsave("pfdhfr 108N_Timep3.png")
## Saving 7 x 5 in image
#PFK13 C580Y
act<- read_csv("ACT_imputed.csv")
## Rows: 828 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (5): year, mean_act, du_obs, num_obs, sum_data
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Set up lag years
act<-act%>%group_by(country)%>%mutate(drug_1yr_ago = lag(mean_act,n=1),drug_2yr_ago = lag(mean_act,n=2), drug_3yr_ago = lag(mean_act,n=3),drug_4yr_ago = lag(mean_act,n=4),drug_5yr_ago = lag(mean_act,n=5))
# join original observation
k13<-read_csv ("pfk13_SNP.csv")%>%left_join(act)
## Rows: 482 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): country, Region, displayName, layerName, layerTitle, continent, va...
## dbl (10): lat, long, year, value, tested, pubMedId, estLoc, present, PR, Pre...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(country, year)`
k13_na_rem <- k13 %>% filter(!is.na(mean_act))
k13_na_rem<- k13_na_rem %>% mutate(PR = PR / 100, Prev_DR = Prev_DR / 100)
write_csv(k13_na_rem, file = "pfk13_database.csv")
###Model
k13_na_rem <-k13_na_rem %>% filter(Prev_DR<1,Prev_DR>0)
Mod1_pfk13 <- lmer(Prev_DR ~ PR +mean_act+PR *mean_act +drug_1yr_ago+drug_2yr_ago+drug_3yr_ago+ (1|country), weights=log(tested),data=k13_na_rem)
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
summary(Mod1_pfk13)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## Prev_DR ~ PR + mean_act + PR * mean_act + drug_1yr_ago + drug_2yr_ago +
## drug_3yr_ago + (1 | country)
## Data: k13_na_rem
## Weights: log(tested)
##
## REML criterion at convergence: 50.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.74881 -0.91367 -0.00882 0.77829 2.05899
##
## Random effects:
## Groups Name Variance Std.Dev.
## country (Intercept) 39.9147 6.3178
## Residual 0.3095 0.5563
## Number of obs: 196, groups: country, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.1149 5.1622 0.6289 0.797 0.6261
## PR -0.5519 1.3075 181.9981 -0.422 0.6735
## mean_act 265.0385 108.6084 5.9586 2.440 0.0507 .
## drug_1yr_ago -305.0699 128.0824 5.0351 -2.382 0.0627 .
## drug_2yr_ago 29.7947 14.3126 188.0034 2.082 0.0387 *
## drug_3yr_ago -32.7775 21.8905 4.9086 -1.497 0.1956
## PR:mean_act 35.5695 68.3399 188.6952 0.520 0.6033
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PR men_ct drg_1_ drg_2_ drg_3_
## PR 0.095
## mean_act 0.446 0.255
## drug_1yr_ag -0.465 -0.244 -0.993
## drug_2yr_ag 0.002 0.008 0.026 -0.085
## drug_3yr_ag -0.470 -0.157 -0.767 0.834 -0.291
## PR:mean_act -0.033 -0.300 -0.081 0.079 -0.108 0.097
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
plot(Mod1_pfk13)

qqnorm(residuals(Mod1_pfk13))

#Interpretation:
#ACT use was found to positively influence pfk13-C580Y prevalence at lead times of up to 3 years, although not significantly. However, Parasite rate negatively correlated with pfk13-C580Y mutation and this was also not significant. The interaction between Parasite rate and ACT use did not significantly influence pfk13-C580Y prevalence either.
###Plots
k13_na_rem<- k13_na_rem %>% mutate(PR = PR * 100, Prev_DR = Prev_DR * 100)
#### 1. Plots of Pfk13_580Y Prevalence vs Mean_act_All regions
ggplot(k13_na_rem, aes(cut(mean_act,10),Prev_DR,col=country))+geom_boxplot()

ggsave("pfK13_580Y_p1.png")
## Saving 7 x 5 in image
### 2. Plots of Pfdhfr_108N Prevalence vs PR_All regions
ggplot(k13_na_rem, aes(cut(PR,10),Prev_DR,col=country))+geom_boxplot()

ggsave("pfk13_580Y_p2.png")
## Saving 7 x 5 in image
### 3. Plot of the relationship between Region and Year varied between pfk13 580Y Prevalence.
ggplot(data = k13_na_rem %>% group_by(country,year) %>% mutate(mean=mean(Prev_DR)), aes (y = country, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Pfk13 580Y Prevalence") + theme_classic() + xlab ("Year") + ylab ("Country")

ggsave("pfk13 580Y_Timep1.png")
## Saving 7 x 5 in image
### 4. Plot of the relationship between Region and Year varied between Parasite rate.
ggplot(data = k13_na_rem %>% group_by(country,year) %>% mutate(mean=mean(PR)), aes (y = country, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="PR") + theme_classic() + xlab ("Year") + ylab ("Country")

ggsave("pfk13 580Y_Timep2.png")
## Saving 7 x 5 in image
### 5. Plot of the relationship between Region and Year varied between Mean_act.
ggplot(data = k13_na_rem %>% group_by(country,year) %>% mutate(mean=mean(mean_act)), aes (y = country, x = year,colour = mean)) + geom_point(aes(size=log(tested))) + scale_color_viridis_c(name="Mean_act") + theme_classic() + xlab ("Year") + ylab ("Country")

ggsave("pfk13 580Y_Timep3.png")
## Saving 7 x 5 in image